// (C) AlgoEngineering LLC 2008-2013
// (C) 2013-2019 NYSE | Intercontinental Exchange
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <https://www.gnu.org/licenses/>.
//
// Contacting ICE: <https://www.theice.com/contact>
//
// Target: algo_lib (lib) -- Support library for all executables
// Exceptions: NO
// Source: cpp/lib/algo/backtrace.cpp -- Print stack trace in case of crash
//
// Created By: alexei.lebedev jeffrey.wang
// Authors: alexei.lebedev
// Recent Changes: alexei.lebedev jeffrey.wang
//
// To catch fatal signal, one should issue algo::SetupFatalSignals().
// Tested on a sample with a static function:
// $ dflt.debug-x86_64/samp_regx  dd
// algo_lib.fatal  signal:Segmentation fault  location:0x004670ad
// dev.gitinfo  date:'2014-07-17 01:01:49 -0400'  author:alebedev@nyx.com  cfg:g++/debug.Linux-x86_64  commit:e3c6e41e6fcae607da3baacd6b3c9e8f7bd19023
// algo_lib.retaddr:0x004670ad  objfile:dflt.debug-x86_64/samp_regx  loadaddr:0x00400000  symbol:"TestFunc()"  symaddr:0x0046709d
// algo_lib.retaddr:0x004671e5  objfile:dflt.debug-x86_64/samp_regx  loadaddr:0x00400000  symbol:"Main(int, char**)"  symaddr:0x004670b5
// algo_lib.retaddr:0x00466fbb  objfile:dflt.debug-x86_64/samp_regx  loadaddr:0x00400000  symbol:main  symaddr:0x00466f98
// algo_lib.retaddr:0x7f3b24d64d1d  objfile:/lib64/libc.so.6  loadaddr:0x7f3b24d46000  symbol:__libc_start_main  symaddr:0x7f3b24d64c20
// algo_lib.retaddr:0x00464959  objfile:dflt.debug-x86_64/samp_regx  loadaddr:0x00400000  symbol:""  symaddr:0x00000000
// Segmentation fault
// Polov: re-entrance check in segfault handler: it is not needed.
// First of all I could not reproduce such situation.  Handler failure
// silently produces core with evidence of this failure.
// Looking at the source I see that Linux kernel has efficient
// protection against such 'double' failure.  First of all, handled
// signal is blocked before entering userspace.  When signal handler
// finishes, it pops __restore_rt frame from stack and it enters
// kernel space again with syscall, where signal is unblocked.  All
// trap signals are generated by kernel in "forced" manner, that means
// the signal can't be ignored or blocked. For such case, kernel
// brutally sets SIG_DFL handler and discards any user handler.
// Even more, for example, getting SIGILL or SIGFPE in SIGSEGV handler
// also invokes default handler which silently produces core.  I could
// not found the reason of such behavior in kernel code, but it really
// works.
// For thread safety: I think we also do not need to check
// re-entrance, as we do not modify global things in the handler.
// Comment for fprintf(): I guess we have to add fflush() at least, as
// _exit() may discard buffer for buffered stream (I prefer exit() for
// that reason). Better to use write().
// #AL#: we don't need fflush,. because stderr is line-buffered
// by default.

#ifdef __MACH__
// 'The deprecated ucontext routines require _XOPEN_SOURCE to be defined'
#define _XOPEN_SOURCE
#endif

#include <dlfcn.h>
#ifndef __MACH__
#include <elf.h>
#endif
#include <execinfo.h>// backtrace
#include <ucontext.h>
#include <cxxabi.h>// abi::__cxa_demangle

// we need to silently break out on any error to do not cause segfault or exception
#define break_if_not_(x) if (LIKELY(x)) ; else break

// Print backtrace symbols
static void BacktraceSymbols_Print(aryptr<void*> addrlist, algo::cstring &lhs) {
#ifdef __MACH__
    //
    (void)addrlist;
    (void)lhs;
#else
    // static symbol table
    int         fd        = -1;    // file descriptor to read
    Elf64_Ehdr  ehdr;              // ELF header, uninitilaized
    bool        ehdr_ok   = false; //
    Elf64_Shdr *shdr      = NULL;  // section headers
    size_t      shsize    = 0;     // size of section headers in bytes
    Elf64_Shdr *sh_symtab = NULL;  // .symtab section header
    Elf64_Shdr *sh_strtab = NULL;  // .strtab section header
    Elf64_Sym  *symtab    = NULL;  // symbol table
    char       *strtab    = NULL;  // string table
    u32         symnum    = 0;     // number of entries in symbol table
    bool        symtab_ok = false;

    // load static symbol table
    do {
        // open file
        break_if_not_(-1 != (fd = open("/proc/self/exe",O_RDONLY)));

        // load ELF header
        break_if_not_(sizeof(ehdr) == read(fd,&ehdr,sizeof(ehdr)));

        // verify format - ELF64, valid ELF header, valid section header size
        ehdr_ok  = ehdr.e_ident[EI_MAG0]  == ELFMAG0;
        ehdr_ok &= ehdr.e_ident[EI_MAG1]  == ELFMAG1;
        ehdr_ok &= ehdr.e_ident[EI_MAG2]  == ELFMAG2;
        ehdr_ok &= ehdr.e_ident[EI_MAG3]  == ELFMAG3;
        ehdr_ok &= ehdr.e_ident[EI_CLASS] == ELFCLASS64;
        ehdr_ok &= ehdr.e_ehsize          == sizeof(ehdr);
        ehdr_ok &= ehdr.e_shentsize       == sizeof(Elf64_Shdr);
        ehdr_ok &= ehdr.e_shnum           <= 100; // coverity, reasonable maximum
        break_if_not_(ehdr_ok);

        // load section headers
        break_if_not_(0 != (shsize = sizeof(Elf64_Shdr)*ehdr.e_shnum)); // no sections - unlikely, but just in case
        break_if_not_(NULL != (shdr = (Elf64_Shdr *)malloc(shsize))); // failed to allocate memory
        break_if_not_(-1 != lseek(fd,ehdr.e_shoff,SEEK_SET)); // failed to lseek
        break_if_not_((int)shsize == read(fd,shdr,shsize));

        // find .symtab and .strtab sections
        for (u16 i=0; i<ehdr.e_shnum; ++i) {
            if (shdr[i].sh_type == SHT_SYMTAB) {
                sh_symtab = shdr+i;
                // associated strtab section can be found by sh_link
                sh_strtab = sh_symtab->sh_link < ehdr.e_shnum && shdr[sh_symtab->sh_link].sh_type == SHT_STRTAB ? shdr+sh_symtab->sh_link : NULL;
                break;
            }
        }
        break_if_not_(sh_symtab&&sh_strtab);
        break_if_not_(sh_symtab->sh_entsize == sizeof(Elf64_Sym));
        break_if_not_(sh_symtab->sh_size <= 10*1024*1024); // coverity, resonable maximum
        // load symtab
        break_if_not_(NULL != (symtab = (Elf64_Sym *)malloc(sh_symtab->sh_size))); // failed to allocate memory
        break_if_not_(-1 != lseek(fd,sh_symtab->sh_offset,SEEK_SET)); // failed to lseek
        break_if_not_((int)sh_symtab->sh_size == read(fd,symtab,sh_symtab->sh_size)); // failed to read symtab
        break_if_not_(0 != (symnum = sh_symtab->sh_size/sizeof(Elf64_Sym))); // there is at least one entry

        // load strtab
        break_if_not_(NULL != (strtab = (char *)malloc(sh_strtab->sh_size))); // failed to allocate memory
        break_if_not_(-1 != lseek(fd,sh_strtab->sh_offset,SEEK_SET)); // failed to lseek
        break_if_not_((int)sh_strtab->sh_size == read(fd,strtab,sh_strtab->sh_size)); // failed to read strtab

        // finally set ok flag
        symtab_ok = true;
#undef break_if_not_
    } while (false);

    // close file
    if (-1 != fd) close(fd);

    // now resolve addresses
    frep_(i,elems_N(addrlist)) {
        Dl_info info;
        dladdr(addrlist[i], &info);
        tempstr msg;

        msg << "  algo_lib.retaddr:";
        msg << info.dli_fname;
        msg << ".";
        u64_PrintHex(u64(addrlist[i]),msg,8);

        const char *sname = info.dli_sname;
        u64 saddr = u64(info.dli_saddr);

        if (!saddr && symtab_ok) {
            // attempt to resolve via static table
            for (u32 j=0; j<symnum; ++j) {
                bool ok = ELF64_ST_TYPE(symtab[j].st_info) == STT_FUNC;
                ok = ok && u64(addrlist[i]) >= symtab[j].st_value;
                ok = ok && u64(addrlist[i]) < symtab[j].st_value+symtab[j].st_size;
                ok = ok && symtab[j].st_name < sh_strtab->sh_size;
                if (ok) {
                    saddr = symtab[j].st_value;
                    sname = strtab+symtab[j].st_name;
                    break;
                }
            }
        }

        msg<<"  symaddr:";
        u64_PrintHex(saddr,msg,8);

        int     status;
        char *demangled_symbol = abi::__cxa_demangle(sname, 0, 0, &status);
        PrintAttrSpace(msg, "symbol", (demangled_symbol ? (const char*)demangled_symbol : sname));
        free(demangled_symbol);

        msg<<eol;
        lhs<< msg;
    }

    // free mem
    if (shdr)   free(shdr);
    if (symtab) free(symtab);
    if (strtab) free(strtab);
#endif
}

// -----------------------------------------------------------------------------

static void PrintTraceCounters(cstring &out) {
    out << "algo_lib.crash_trace  comment:'Current values of trace counters are shown below'"<<eol;
    ind_beg(algo_lib::_db_imdb_curs,imdb,algo_lib::_db) {
        algo_lib::FImtable* imtable = algo_lib::ind_imtable_Find(tempstr()<<imdb.imdb<<".trace");
        algo::ImrowPtr row = imtable && imtable->c_RowidFind ? imtable->c_RowidFind(0) : algo::ImrowPtr();
        if (imtable && row && imtable->Print) {
            tempstr temp;
            imtable->Print(row, temp);
            int n=0;
            // show non-zero traces in semi-readable form
            ind_beg(algo::Attr_curs,attr,temp) if (ch_N(attr.name) && !(attr.value == "0")) {
                if (n%8==0) {// max key/val attrs per line
                    if (n>0) {
                        out<<eol;
                    }
                    out<<Keyval("",tempstr()<<imdb.imdb<<".trace");
                    n=0;
                }
                out << Keyval(attr.name,attr.value);
                n++;
            }ind_end;
            if (n>0) {
                out<<eol;
            }
        }
    }ind_end;
}

// -----------------------------------------------------------------------------

static void PrintTraces() {
    cstring &out=algo_lib::_db.fatalerr;
    PrintTraceCounters(out);
    algo_lib::h_fatalerror_Call();
}

// -----------------------------------------------------------------------------

void algo::FatalErrorExit(const char *a) NORETURN {
    cstring &out = algo_lib::_db.fatalerr;
    out << "algo.fatal_error" << eol;
    out << a << eol;
    out << " "<<gitinfo_Get()<<eol;
    void* addrlist[64];
    int nsyms = backtrace(addrlist, _array_count(addrlist));
    if (nsyms) {
        BacktraceSymbols_Print(aryptr<void*>(addrlist+1,nsyms-1),out);
    }
    PrintTraces();
    fprintf(stderr,"%s\n",Zeroterm(out));// do not use prerr!
    _exit(1);
}

// -----------------------------------------------------------------------------

// Retrieve instruction pointer from signal handling context
static uintptr_t GetIp(ucontext_t *uc) {
    uintptr_t ret = 0;
#if defined(__FreeBSD__)
    ret = uc->uc_mcontext.mc_rip;
#elif defined(__MACH__)
    ret = uc->uc_mcontext->__ss.__rip;
#else
    ret = uc->uc_mcontext.gregs[REG_RIP];
#endif
    return ret;
}

// -----------------------------------------------------------------------------

// Retrieve instruction pointer from signal handling context, as a string
static tempstr IpString(ucontext_t *uc) {
    tempstr ret;
    u64_PrintHex(GetIp(uc), ret, 8, true);
    return ret;
}

// -----------------------------------------------------------------------------

static void ShowStackTrace(ucontext_t *uc, cstring &out) {
    void* addrlist[64];
    int nsyms = backtrace(addrlist, _array_count(addrlist));
    if (nsyms) {
        // this heuristic is from glibs:
        // Now attempt to locate the PC from signal context in the backtrace.
        // Normally it will be found at arr[2], but it might appear later
        // if there were some signal handler wrappers.  Allow a few bytes
        // difference to cope with as many arches as possible.
        int start = 0 ;
        for (uintptr_t ip=GetIp(uc); ip && start < nsyms; ++start) {
            if ((uintptr_t) addrlist[start] >= ip - 16 && (uintptr_t) addrlist[start] <= ip + 16) {
                break;
            }
        }
        // If we haven't found it, better dump full backtrace even including
        // the signal handler frames instead of not dumping anything.
        if (start == nsyms) {
            start = 0;
        }
        BacktraceSymbols_Print(aryptr<void*>(addrlist+start,nsyms-start),out);
    }
}

// -----------------------------------------------------------------------------

static void Signal(int signal, siginfo_t *_si, void *context) {
    (void)_si;// ignore siginfo
    ucontext_t *uc = (ucontext_t*)context;
    cstring &out=algo_lib::_db.fatalerr;
    out << "algo_lib.signal"
        <<Keyval("local",IpString(uc))
        <<Keyval("text",strsignal(signal))
        <<eol;
    out << algo::gitinfo_Get()<<eol;
    ShowStackTrace(uc,out);
    PrintTraces();
    // do not use prerr, be safe: this function may be called from a thread
    // that doesn't support prerr.
    algo::WriteFile(algo::Fildes(2), (u8*)out.ch_elems, out.ch_n);
    algo::WriteFile(algo::Fildes(2), (u8*)"\n", 1);
    // Pass on the signal (so that a core file is produced).
    struct sigaction sa;
    sa.sa_handler = SIG_DFL;
    sigemptyset(&sa.sa_mask);
    sa.sa_flags = 0;
    sigaction(signal, &sa, NULL);
    raise(signal);
}

// catch fatal signals and show backtrace
void algo::SetupFatalSignals() {
    struct sigaction sa;
    sa.sa_sigaction = Signal;
    sigemptyset(&sa.sa_mask);
    sa.sa_flags = SA_SIGINFO;
    sigaction(SIGSEGV  ,&sa, NULL);
    sigaction(SIGILL   ,&sa, NULL);
    sigaction(SIGBUS   ,&sa, NULL);
#ifdef __linux__
    // is there a SIG stack fault on MacOS?
    sigaction(SIGSTKFLT,&sa, NULL);
#endif
    sigaction(SIGABRT  ,&sa, NULL);
    sigaction(SIGFPE   ,&sa, NULL);
}
